rm(list=ls(all=TRUE))
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(microbiome)
## Loading required package: phyloseq
##
## microbiome R package (microbiome.github.com)
##
##
##
## Copyright (C) 2011-2022 Leo Lahti,
## Sudarshan Shetty et al. <microbiome.github.io>
##
##
## Attaching package: 'microbiome'
##
## The following object is masked from 'package:ggplot2':
##
## alpha
##
## The following object is masked from 'package:base':
##
## transform
library(ggh4x)
Read relative abundance dataset:
physeq <- read_rds("Data/phyloseq.rds")
physeq
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 197 taxa and 55 samples ]
## sample_data() Sample Data: [ 55 samples by 26 sample variables ]
## tax_table() Taxonomy Table: [ 197 taxa by 8 taxonomic ranks ]
Extract taxon table:
tax_table <- physeq %>% tax_table() %>% as.data.frame() %>% rownames_to_column("OTU") %>% as_tibble()
tax_table
Sample attributes:
sample_attributes <- physeq %>% sample_data() %>% data.frame() %>% rownames_to_column("Sample") %>% colnames()
sample_attributes
## [1] "Sample" "ATTRIBUTE_SampleType"
## [3] "ATTRIBUTE_SampleTypeSub1" "NCBITaxonomy"
## [5] "YearOfAnalysis" "SubjectIdentifierAsRecorded"
## [7] "AgeInYears" "ATTRIBUTE_BiologicalSex"
## [9] "UBERONBodyPartName" "TermsofPosition"
## [11] "HealthStatus" "DOIDCommonName"
## [13] "ComorbidityListDOIDIndex" "SampleCollectionDateandTime"
## [15] "Country" "HumanPopulationDensity"
## [17] "LatitudeandLongitude" "DepthorAltitudeMeters"
## [19] "qiita_sample_name" "UniqueSubjectID"
## [21] "LifeStage" "UBERONOntologyIndex"
## [23] "DOIDOntologyIndex" "ATTRIBUTE_Ethnic_Group"
## [25] "ATTRIBUTE_Study_Group" "Sebumeter_Score"
## [27] "Skicon_Score"
Experimental groups:
physeq %>% sample_data() %>% as_tibble() %>% count(ATTRIBUTE_Study_Group)
Convert Phyloseq object to long format:
data_long <- physeq %>%
psmelt() %>% as_tibble()
data_long
What are the most abundant families across all samples?
family_abundance <- data_long %>%
group_by(Family) %>%
summarize(Abundance = sum(Abundance)/n_distinct(Sample)) %>%
arrange(-Abundance)
family_abundance
Select the largest families for downstream analysis:
major_families <- family_abundance$Family[1:6]
major_families
## [1] "Propionibacteriaceae" "Staphylococcaceae" "Polyomaviridae"
## [4] "Malasseziaceae" "Corynebacteriaceae" "Micrococcaceae"
Add additional column Major_Families to the long dataset as well as to the Phyloseq object:
data_long <- data_long %>%
mutate(
Major_Families = if_else(
Family %in% major_families,
Family, "Other families"
),
Major_Families = factor(Major_Families, levels = c(major_families, "Other families"))
)
tax_table(physeq) <- tax_table(physeq) %>%
as.data.frame() %>%
mutate(
Major_Families = if_else(
Family %in% major_families,
Family, "Other families"
)
) %>%
as.matrix()
Plot relative abundance of the major families:
data_long %>%
inner_join(
data_long %>%
group_by(ATTRIBUTE_Study_Group, Sample) %>%
summarize(
Propionibacteriaceae = sum(Abundance[Family == "Propionibacteriaceae"]),
.groups = "drop_last"
) %>%
arrange(-Propionibacteriaceae) %>%
mutate(
Sample_No = row_number()
) %>%
select(ATTRIBUTE_Study_Group, Sample, Sample_No),
by = c("ATTRIBUTE_Study_Group", "Sample")
) %>%
mutate(`Relative abundance` = Abundance) %>%
ggplot() +
geom_col(
aes(interaction(Sample_No, ATTRIBUTE_Study_Group), `Relative abundance`, fill = Major_Families)
) +
scale_x_discrete(guide = guide_axis_nested(n.dodge = 0, title = "Study group")) +
theme(
axis.ticks.x = element_blank(),
strip.text.x = element_text(angle = 90, vjust = 0.5, hjust=0),
legend.position = "bottom",
legend.title = element_blank()
) +
scale_fill_brewer(palette = "Set1")
Label samples with more than 50% Propionibacteriaceae in the Phyloseq object:
sample_data(physeq) <- sample_data(physeq) %>%
data.frame() %>%
rownames_to_column("Sample") %>%
inner_join(
data_long %>%
group_by(Sample) %>%
summarize(
Propionibacteriaceae = sum(Abundance[Family == "Propionibacteriaceae"]),
Polyomaviridae = sum(Abundance[Family == "Polyomaviridae"])
) %>%
mutate(
Propionibacteriaceae = Propionibacteriaceae > 0.5,
Polyomaviridae = Polyomaviridae > 0.1
),
by = "Sample"
) %>%
column_to_rownames("Sample")
Look at Propionibacteriaceae:
data_long <- data_long %>%
mutate(
Propionibacteria = if_else(
Family == "Propionibacteriaceae" & is.na(Species),
Genus, Species
)
)
Propionibacteria <- data_long %>%
filter(Family == "Propionibacteriaceae") %>%
group_by(Propionibacteria) %>%
summarize(Abundance = sum(Abundance)/n_distinct(Sample)) %>%
arrange(-Abundance) %>%
pull(Propionibacteria)
data_long %>%
inner_join(
data_long %>%
group_by(ATTRIBUTE_Study_Group, Sample) %>%
summarize(
Propionibacteriaceae = sum(Abundance[Family == "Propionibacteriaceae"]),
.groups = "drop_last"
) %>%
arrange(-Propionibacteriaceae) %>%
mutate(
Sample_No = row_number()
) %>%
select(ATTRIBUTE_Study_Group, Sample, Sample_No),
by = c("ATTRIBUTE_Study_Group", "Sample")
) %>%
filter(Family == "Propionibacteriaceae") %>%
mutate(
`Relative abundance` = Abundance
) %>%
ggplot() +
geom_col(
aes(interaction(Sample_No, ATTRIBUTE_Study_Group), `Relative abundance`, fill = Propionibacteria)
) +
scale_x_discrete(guide = guide_axis_nested(n.dodge = 0, title = "Study group")) +
theme(
axis.ticks.x = element_blank(),
strip.text.x = element_text(angle = 90, vjust = 0.5, hjust=0),
legend.position = "bottom",
legend.title = element_blank()
) +
scale_fill_brewer(palette = "Set1", limits = Propionibacteria)
Look at strains of Propionibacteriaceae:
data_long <- data_long %>%
mutate(
Propionibacterium_varieties = if_else(
Family == "Propionibacteriaceae" & is.na(Variety),
Propionibacteria, Variety
)
)
Propionibacterium_varieties <- data_long %>%
filter(Family == "Propionibacteriaceae") %>%
group_by(Propionibacterium_varieties) %>%
summarize(Abundance = sum(Abundance)/n_distinct(Sample)) %>%
arrange(-Abundance) %>%
pull(Propionibacterium_varieties)
data_long %>%
inner_join(
data_long %>%
group_by(ATTRIBUTE_Study_Group, Sample) %>%
summarize(
Propionibacteriaceae = sum(Abundance[Family == "Propionibacteriaceae"]),
.groups = "drop_last"
) %>%
arrange(-Propionibacteriaceae) %>%
mutate(
Sample_No = row_number()
) %>%
select(ATTRIBUTE_Study_Group, Sample, Sample_No),
by = c("ATTRIBUTE_Study_Group", "Sample")
) %>%
filter(Family == "Propionibacteriaceae") %>%
mutate(
`Relative abundance` = Abundance
) %>%
ggplot() +
geom_col(
aes(interaction(Sample_No, ATTRIBUTE_Study_Group), `Relative abundance`, fill = Propionibacterium_varieties)
) +
scale_x_discrete(guide = guide_axis_nested(n.dodge = 0, title = "Study group")) +
theme(
axis.ticks.x = element_blank(),
strip.text.x = element_text(angle = 90, vjust = 0.5, hjust=0),
legend.position = "right",
legend.title = element_blank()
) +
scale_fill_brewer(palette = "Set1", limits = Propionibacterium_varieties)
The null hypothesis cannot be rejected that Propionibacterium_acnes_unclassified, Propionibacteriaceae_unclassified, and GCF_000221145 are in fact the same strain.
All taxa from this family should be summarized as Propionibacteriaceae, with almost all reads being Propionibacterium_acnes
Look at Staphylococcaceae:
Staphylococci <- data_long %>%
filter(Family == "Staphylococcaceae") %>%
group_by(Species) %>%
summarize(Abundance = sum(Abundance)/n_distinct(Sample)) %>%
arrange(-Abundance) %>%
pull(Species) %>%
.[1:3]
data_long %>%
inner_join(
data_long %>%
group_by(ATTRIBUTE_Study_Group, Sample) %>%
summarize(
Staphylococcaceae = sum(Abundance[Family == "Staphylococcaceae"]),
.groups = "drop_last"
) %>%
arrange(-Staphylococcaceae) %>%
mutate(
Sample_No = row_number()
) %>%
select(ATTRIBUTE_Study_Group, Sample, Sample_No),
by = c("ATTRIBUTE_Study_Group", "Sample")
) %>%
filter(Family == "Staphylococcaceae") %>%
mutate(
`Relative abundance` = Abundance,
Staphylococci = if_else(Species %in% Staphylococci, Species, "Other Staphylococci")
) %>%
ggplot() +
geom_col(
aes(interaction(Sample_No, ATTRIBUTE_Study_Group), `Relative abundance`, fill = Staphylococci)
) +
scale_x_discrete(guide = guide_axis_nested(n.dodge = 0, title = "Study group")) +
theme(
axis.ticks.x = element_blank(),
strip.text.x = element_text(angle = 90, vjust = 0.5, hjust=0),
legend.position = "right",
legend.title = element_blank()
) +
scale_fill_brewer(
palette = "Set1",
limits = Staphylococci %>% c("Other Staphylococci")
)
Which taxa should be summarized into Staphylococcus_caprae_capitis?
taxa_staph_caprae_capitis <- tax_table %>%
filter(Species == "Staphylococcus_caprae_capitis") %>%
pull(OTU)
tax_table %>%
filter(OTU %in% taxa_staph_caprae_capitis)
Which taxa should be summarized into Staphylococcus_epidermidis?
taxa_staph_epidermidis <- tax_table %>%
filter(Species == "Staphylococcus_epidermidis") %>%
pull(OTU)
tax_table %>%
filter(OTU %in% taxa_staph_epidermidis)
Which taxa should be summarized into Staphylococcus_hominis?
taxa_staph_hominis <- tax_table %>%
filter(Species == "Staphylococcus_hominis") %>%
pull(OTU)
tax_table %>%
filter(OTU %in% taxa_staph_hominis)
Which taxa should be summarized as other Staphylococci?
taxa_staph_other <- tax_table %>%
filter(
Family == "Staphylococcaceae" &
!OTU %in% c(taxa_staph_caprae_capitis, taxa_staph_epidermidis, taxa_staph_hominis)
) %>%
pull(OTU)
tax_table %>%
filter(OTU %in% taxa_staph_other)
Look at Polyomaviridae:
Polyomaviri <- data_long %>%
filter(Family == "Polyomaviridae") %>%
group_by(Species) %>%
summarize(Abundance = sum(Abundance)/n_distinct(Sample)) %>%
arrange(-Abundance) %>%
pull(Species)
data_long %>%
inner_join(
data_long %>%
group_by(ATTRIBUTE_Study_Group, Sample) %>%
summarize(
Polyomaviridae = sum(Abundance[Family == "Polyomaviridae"]),
.groups = "drop_last"
) %>%
arrange(-Polyomaviridae) %>%
mutate(
Sample_No = row_number()
) %>%
select(ATTRIBUTE_Study_Group, Sample, Sample_No),
by = c("ATTRIBUTE_Study_Group", "Sample")
) %>%
filter(Family == "Polyomaviridae") %>%
mutate(`Relative abundance` = Abundance) %>%
ggplot() +
geom_col(
aes(interaction(Sample_No, ATTRIBUTE_Study_Group), `Relative abundance`, fill = Species)
) +
scale_x_discrete(guide = guide_axis_nested(n.dodge = 0, title = "Study group")) +
theme(
axis.ticks.x = element_blank(),
strip.text.x = element_text(angle = 90, vjust = 0.5, hjust=0),
legend.position = "right",
legend.title = element_blank()
) +
scale_fill_brewer(palette = "Set1", limits = Polyomaviri)
Which taxa should be summarized into Polyomavirus_HPyV6?
taxa_poly_hpyv6 <- tax_table %>%
filter(Species == "Polyomavirus_HPyV6") %>%
pull(OTU)
tax_table %>%
filter(OTU %in% taxa_poly_hpyv6)
Which taxa should be summarized into Polyomavirus_HPyV7
taxa_poly_hpyv7 <- tax_table %>%
filter(Species == "Polyomavirus_HPyV7") %>%
pull(OTU)
tax_table %>%
filter(OTU %in% taxa_poly_hpyv7)
Which taxa should be summarized into Merkel_cell_polyomavirus?
taxa_poly_merkel <- tax_table %>%
filter(Species == "Merkel_cell_polyomavirus") %>%
pull(OTU)
tax_table %>%
filter(OTU %in% taxa_poly_merkel)
Plot relative abundance of the major families, sorted by sebumeter score:
data_long %>%
inner_join(
data_long %>%
group_by(ATTRIBUTE_Study_Group, Sample) %>%
summarize(
Sebumeter_Score = Sebumeter_Score[1],
.groups = "drop_last"
) %>%
arrange(Sebumeter_Score) %>%
mutate(
Sample_No = row_number()
) %>%
select(ATTRIBUTE_Study_Group, Sample, Sample_No),
by = c("ATTRIBUTE_Study_Group", "Sample")
) %>%
mutate(`Relative abundance` = Abundance) %>%
ggplot() +
geom_col(
aes(interaction(Sample_No, ATTRIBUTE_Study_Group), `Relative abundance`, fill = Major_Families)
) +
scale_x_discrete(guide = guide_axis_nested(n.dodge = 0, title = "Study group")) +
theme(
axis.ticks.x = element_blank(),
strip.text.x = element_text(angle = 90, vjust = 0.5, hjust=0),
legend.position = "bottom",
legend.title = element_blank()
) +
scale_fill_brewer(palette = "Set1")
data_long %>%
group_by(ATTRIBUTE_Study_Group, Sample) %>%
summarize(
Sebumeter_Score = Sebumeter_Score[1],
.groups = "drop_last"
) %>%
arrange(Sebumeter_Score) %>%
mutate(
Sample_No = row_number(),
`Sebumeter score` = Sebumeter_Score
) %>%
ggplot() +
geom_col(
aes(interaction(Sample_No, ATTRIBUTE_Study_Group), `Sebumeter score`, fill = "")
) +
scale_x_discrete(guide = guide_axis_nested(n.dodge = 0, title = "Study group")) +
theme(
axis.ticks.x = element_blank(),
strip.text.x = element_text(angle = 90, vjust = 0.5, hjust=0),
legend.position = "none",
legend.title = element_blank()
) +
scale_fill_brewer(palette = "Set1")
Plot relative abundance of the major families, sorted by Skicon score:
data_long %>%
inner_join(
data_long %>%
group_by(Sample) %>%
summarize(
Skicon_Score = Skicon_Score[1],
.groups = "drop_last"
) %>%
arrange(Skicon_Score) %>%
mutate(
Sample_No = row_number()
) %>%
select(Sample, Sample_No),
by = "Sample"
) %>%
mutate(`Relative abundance` = Abundance) %>%
ggplot() +
geom_col(
aes(Sample_No, `Relative abundance`, fill = Major_Families)
) +
theme(
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
legend.position = "bottom",
legend.title = element_blank()
) +
scale_fill_brewer(palette = "Set1")
data_long %>%
group_by(Sample) %>%
summarize(
Skicon_Score = Skicon_Score[1],
.groups = "drop_last"
) %>%
arrange(Skicon_Score) %>%
mutate(
Sample_No = row_number(),
`Skicon score` = Skicon_Score
) %>%
ggplot() +
geom_col(
aes(Sample_No, `Skicon score`, fill = "")
) +
theme(
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
legend.position = "none",
legend.title = element_blank()
) +
scale_fill_brewer(palette = "Set1")
Define function for axis labels:
get_axis_labels <- function(x) str_c(
"PC",
1:length(x$values$Relative_eig),
" [",
(100 * x$values$Relative_eig) %>% round(1),
"%]"
)
Calculate ordination:
ordination <- physeq %>%
ordinate("PCoA", "bray")
Plot samples:
axis_labels <- get_axis_labels(ordination)
plot_ordination(
physeq,
ordination,
type = "samples",
color = "Dominat_Propioni"
) +
scale_color_brewer(palette = "Set1", limits = c(T, F)) +
labs(
title = "PCoA, Bray-Curtis dissimilarity",
x = axis_labels[1], y = axis_labels[2]
)
## Warning in plot_ordination(physeq, ordination, type = "samples", color =
## "Dominat_Propioni"): Color variable was not found in the available data you
## provided.No color mapped.
plot_ordination(
physeq,
ordination,
type = "samples",
color = "ATTRIBUTE_Study_Group"
) +
scale_color_brewer(palette = "Set1") +
labs(
title = "PCoA, Bray-Curtis dissimilarity",
x = axis_labels[1], y = axis_labels[2]
)
plot_ordination(
physeq,
ordination,
type = "samples",
color = "ATTRIBUTE_Ethnic_Group"
) +
scale_color_brewer(palette = "Set1") +
labs(
title = "PCoA, Bray-Curtis dissimilarity",
x = axis_labels[1], y = axis_labels[2]
)
plot_ordination(
physeq,
ordination,
type = "samples",
color = "ATTRIBUTE_BiologicalSex"
) +
scale_color_brewer(palette = "Set1") +
labs(
title = "PCoA, Bray-Curtis dissimilarity",
x = axis_labels[1], y = axis_labels[2]
)
plot_ordination(
physeq,
ordination,
type = "samples",
color = "Sebumeter_Score"
) +
scale_color_viridis_c() +
labs(
title = "PCoA, Bray-Curtis dissimilarity",
x = axis_labels[1], y = axis_labels[2]
)
plot_ordination(
physeq,
ordination,
type = "samples",
color = "Skicon_Score"
) +
scale_color_viridis_c() +
labs(
title = "PCoA, Bray-Curtis dissimilarity",
x = axis_labels[1], y = axis_labels[2]
)
plot_ordination(
physeq,
ordination,
type = "samples",
color = "AgeInYears"
) +
scale_color_viridis_c() +
labs(
title = "PCoA, Bray-Curtis dissimilarity",
x = axis_labels[1], y = axis_labels[2]
)
plot_ordination(
physeq,
ordination,
type = "samples",
color = "Propionibacteriaceae"
) +
scale_color_brewer(palette = "Set1", limits = c(T, F)) +
labs(
title = "PCoA, Bray-Curtis dissimilarity",
x = axis_labels[1], y = axis_labels[2]
)
plot_ordination(
physeq,
ordination,
type = "samples",
color = "Polyomaviridae"
) +
scale_color_brewer(palette = "Set1", limits = c(T, F)) +
labs(
title = "PCoA, Bray-Curtis dissimilarity",
x = axis_labels[1], y = axis_labels[2]
)
Plot families:
plot_ordination(
physeq,
ordination,
type = "taxa",
color = "Major_Families"
) +
scale_color_brewer(palette = "Set1", limits = c(major_families, "Other families"), name = NULL) +
labs(
title = "PCoA, Bray-Curtis dissimilarity",
x = axis_labels[1], y = axis_labels[2],
)
Aggregate dataset by major taxa. Check whether the sum of the abundances within each sample is 1 and whether no taxa were lost:
data_major_taxa_long <- data_long %>%
mutate(
Major_Taxa = case_when(
OTU %in% taxa_poly_hpyv6 ~ "Polyomavirus HPyV6",
OTU %in% taxa_poly_hpyv7 ~ "Polyomavirus HPyV7",
OTU %in% taxa_poly_merkel ~ "Merkel Cell Polyomavirus",
OTU %in% taxa_staph_caprae_capitis ~ "Staphylococcus caprae or capitis",
OTU %in% taxa_staph_epidermidis ~ "Staphylococcus epidermidis",
OTU %in% taxa_staph_hominis ~ "Staphylococcus hominis",
OTU %in% taxa_staph_other ~ "Other Staphylococci",
T ~ Major_Families
),
Major_Taxa = Major_Taxa %>% factor(levels = c("Propionibacteriaceae", "Staphylococcus caprae or capitis", "Staphylococcus epidermidis", "Staphylococcus hominis", "Other Staphylococci", "Polyomavirus HPyV6", "Polyomavirus HPyV7", "Merkel Cell Polyomavirus", "Malasseziaceae", "Corynebacteriaceae", "Micrococcaceae", "Other families"))
) %>%
group_by(across(c(sample_attributes, Major_Families, Major_Taxa))) %>%
summarize(
N_Taxa = n(),
Abundance = sum(Abundance),
.groups = "drop"
)
## Warning: There was 1 warning in `group_by()`.
## ℹ In argument: `across(c(sample_attributes, Major_Families, Major_Taxa))`.
## Caused by warning:
## ! Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(sample_attributes)
##
## # Now:
## data %>% select(all_of(sample_attributes))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
data_major_taxa_long %>%
group_by(Sample) %>%
summarize(Abundance = sum(Abundance), .groups = "drop")
data_major_taxa_long %>%
group_by(Major_Taxa) %>%
summarize(N_Taxa = N_Taxa[1]) %>%
summarize(N_Taxa = sum(N_Taxa))
Transform to wide format and save to file:
data_major_taxa_wide <- data_major_taxa_long %>%
pivot_wider(id_cols = Major_Taxa, names_from = Sample, values_from = Abundance)
data_major_taxa_wide %>% write_tsv("Data/data_major_taxa_wide.tsv")
data_major_taxa_wide
Plot relative abundances:
data_major_taxa_long %>%
inner_join(
data_major_taxa_long %>%
filter(Major_Taxa == "Propionibacteriaceae") %>%
arrange(-Abundance) %>%
group_by(ATTRIBUTE_Study_Group) %>%
mutate(
Sample_No = row_number()
) %>%
select(ATTRIBUTE_Study_Group, Sample, Sample_No),
by = c("ATTRIBUTE_Study_Group", "Sample")
) %>%
mutate(`Relative abundance` = Abundance) %>%
ggplot() +
geom_col(
aes(interaction(Sample_No, ATTRIBUTE_Study_Group), `Relative abundance`, fill = Major_Taxa)
) +
scale_x_discrete(guide = guide_axis_nested(n.dodge = 0, title = "Study group")) +
theme(
axis.ticks.x = element_blank(),
strip.text.x = element_text(angle = 90, vjust = 0.5, hjust=0),
legend.position = "right",
legend.title = element_blank()
) +
scale_fill_brewer(palette = "Set3")
Plot relative abundances, sorted by sebumeter score:
data_major_taxa_long %>%
inner_join(
data_major_taxa_long %>%
filter(Major_Taxa == "Propionibacteriaceae") %>%
arrange(Sebumeter_Score) %>%
group_by(ATTRIBUTE_Study_Group) %>%
mutate(
Sample_No = row_number()
) %>%
select(ATTRIBUTE_Study_Group, Sample, Sample_No),
by = c("ATTRIBUTE_Study_Group", "Sample")
) %>%
mutate(`Relative abundance` = Abundance) %>%
ggplot() +
geom_col(
aes(interaction(Sample_No, ATTRIBUTE_Study_Group), `Relative abundance`, fill = Major_Taxa)
) +
scale_x_discrete(guide = guide_axis_nested(n.dodge = 0, title = "Study group")) +
theme(
axis.ticks.x = element_blank(),
strip.text.x = element_text(angle = 90, vjust = 0.5, hjust=0),
legend.position = "right",
legend.title = element_blank()
) +
scale_fill_brewer(palette = "Set3")
data_major_taxa_long %>%
group_by(ATTRIBUTE_Study_Group, Sample) %>%
summarize(
Sebumeter_Score = Sebumeter_Score[1],
.groups = "drop_last"
) %>%
arrange(Sebumeter_Score) %>%
mutate(
Sample_No = row_number(),
`Sebumeter score` = Sebumeter_Score
) %>%
ggplot() +
geom_col(
aes(interaction(Sample_No, ATTRIBUTE_Study_Group), `Sebumeter score`, fill = "")
) +
scale_x_discrete(guide = guide_axis_nested(n.dodge = 0, title = "Study group")) +
theme(
axis.ticks.x = element_blank(),
strip.text.x = element_text(angle = 90, vjust = 0.5, hjust=0),
legend.position = "none",
legend.title = element_blank()
) +
scale_fill_brewer(palette = "Set1")
Plot relative abundances, sorted by sebumeter score:
data_major_taxa_long %>%
inner_join(
data_major_taxa_long %>%
filter(Major_Taxa == "Propionibacteriaceae") %>%
arrange(Skicon_Score) %>%
mutate(
Sample_No = row_number()
) %>%
select(ATTRIBUTE_Study_Group, Sample, Sample_No),
by = c("ATTRIBUTE_Study_Group", "Sample")
) %>%
mutate(`Relative abundance` = Abundance) %>%
ggplot() +
geom_col(
aes(Sample_No, `Relative abundance`, fill = Major_Taxa)
) +
theme(
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
legend.position = "right",
legend.title = element_blank()
) +
scale_fill_brewer(palette = "Set3")
data_major_taxa_long %>%
group_by(Sample) %>%
summarize(
Skicon_Score = Skicon_Score[1],
.groups = "drop_last"
) %>%
arrange(Skicon_Score) %>%
mutate(
Sample_No = row_number(),
`Skicon score` = Skicon_Score
) %>%
ggplot() +
geom_col(
aes(Sample_No, `Skicon score`, fill = "")
) +
theme(
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
legend.position = "none",
legend.title = element_blank()
) +
scale_fill_brewer(palette = "Set1")
Filter dataset for Propionibacteriaceae and Staphylococcaceae and re-scale the abundances within each sample to 1:
data_propi_staph_long <- data_major_taxa_long %>%
filter(Major_Families %in% c("Propionibacteriaceae", "Staphylococcaceae")) %>%
group_by(Sample) %>%
mutate(Abundance = Abundance / sum(Abundance)) %>%
ungroup()
data_propi_staph_long %>%
group_by(Sample) %>%
summarize(Abundance = sum(Abundance), .groups = "drop")
data_propi_staph_long %>%
group_by(Major_Taxa) %>%
summarize(N_Taxa = N_Taxa[1]) %>%
summarize(N_Taxa = sum(N_Taxa))
Transform to wide format and save to file:
data_propi_staph_wide <- data_propi_staph_long %>%
pivot_wider(id_cols = Major_Taxa, names_from = Sample, values_from = Abundance)
data_propi_staph_wide %>% write_tsv("Data/data_propi_staph_wide.tsv")
data_propi_staph_wide
Plot relative abundances:
data_propi_staph_long %>%
inner_join(
data_propi_staph_long %>%
filter(Major_Taxa == "Propionibacteriaceae") %>%
arrange(-Abundance) %>%
group_by(ATTRIBUTE_Study_Group) %>%
mutate(
Sample_No = row_number()
) %>%
select(ATTRIBUTE_Study_Group, Sample, Sample_No),
by = c("ATTRIBUTE_Study_Group", "Sample")
) %>%
mutate(`Relative abundance` = Abundance) %>%
ggplot() +
geom_col(
aes(interaction(Sample_No, ATTRIBUTE_Study_Group), `Relative abundance`, fill = Major_Taxa)
) +
scale_x_discrete(guide = guide_axis_nested(n.dodge = 0, title = "Study group")) +
theme(
axis.ticks.x = element_blank(),
strip.text.x = element_text(angle = 90, vjust = 0.5, hjust=0),
legend.position = "right",
legend.title = element_blank()
) +
scale_fill_brewer(palette = "Set3")
Plot relative abundances, sorted by sebumeter score:
data_propi_staph_long %>%
inner_join(
data_propi_staph_long %>%
filter(Major_Taxa == "Propionibacteriaceae") %>%
arrange(Sebumeter_Score) %>%
group_by(ATTRIBUTE_Study_Group) %>%
mutate(
Sample_No = row_number()
) %>%
select(ATTRIBUTE_Study_Group, Sample, Sample_No),
by = c("ATTRIBUTE_Study_Group", "Sample")
) %>%
mutate(`Relative abundance` = Abundance) %>%
ggplot() +
geom_col(
aes(interaction(Sample_No, ATTRIBUTE_Study_Group), `Relative abundance`, fill = Major_Taxa)
) +
scale_x_discrete(guide = guide_axis_nested(n.dodge = 0, title = "Study group")) +
theme(
axis.ticks.x = element_blank(),
strip.text.x = element_text(angle = 90, vjust = 0.5, hjust=0),
legend.position = "right",
legend.title = element_blank()
) +
scale_fill_brewer(palette = "Set3")
data_propi_staph_long %>%
group_by(ATTRIBUTE_Study_Group, Sample) %>%
summarize(
Sebumeter_Score = Sebumeter_Score[1],
.groups = "drop_last"
) %>%
arrange(Sebumeter_Score) %>%
mutate(
Sample_No = row_number(),
`Sebumeter score` = Sebumeter_Score
) %>%
ggplot() +
geom_col(
aes(interaction(Sample_No, ATTRIBUTE_Study_Group), `Sebumeter score`, fill = "")
) +
scale_x_discrete(guide = guide_axis_nested(n.dodge = 0, title = "Study group")) +
theme(
axis.ticks.x = element_blank(),
strip.text.x = element_text(angle = 90, vjust = 0.5, hjust=0),
legend.position = "none",
legend.title = element_blank()
) +
scale_fill_brewer(palette = "Set1")
Plot relative abundances, sorted by sebumeter score:
data_propi_staph_long %>%
inner_join(
data_propi_staph_long %>%
filter(Major_Taxa == "Propionibacteriaceae") %>%
arrange(Skicon_Score) %>%
mutate(
Sample_No = row_number()
) %>%
select(ATTRIBUTE_Study_Group, Sample, Sample_No),
by = c("ATTRIBUTE_Study_Group", "Sample")
) %>%
mutate(`Relative abundance` = Abundance) %>%
ggplot() +
geom_col(
aes(Sample_No, `Relative abundance`, fill = Major_Taxa)
) +
theme(
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
legend.position = "right",
legend.title = element_blank()
) +
scale_fill_brewer(palette = "Set3")
data_propi_staph_long %>%
group_by(Sample) %>%
summarize(
Skicon_Score = Skicon_Score[1],
.groups = "drop_last"
) %>%
arrange(Skicon_Score) %>%
mutate(
Sample_No = row_number(),
`Skicon score` = Skicon_Score
) %>%
ggplot() +
geom_col(
aes(Sample_No, `Skicon score`, fill = "")
) +
theme(
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
legend.position = "none",
legend.title = element_blank()
) +
scale_fill_brewer(palette = "Set1")
timestamp()
## ##------ Sun Jul 23 17:28:37 2023 ------##
sessionInfo()
## R version 4.3.0 (2023-04-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3; LAPACK version 3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggh4x_0.2.5 microbiome_1.22.0 phyloseq_1.44.0 lubridate_1.9.2
## [5] forcats_1.0.0 stringr_1.5.0 dplyr_1.1.2 purrr_1.0.1
## [9] readr_2.1.4 tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.2
## [13] tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] ade4_1.7-22 tidyselect_1.2.0 viridisLite_0.4.2
## [4] farver_2.1.1 Biostrings_2.68.1 bitops_1.0-7
## [7] fastmap_1.1.1 RCurl_1.98-1.12 digest_0.6.33
## [10] timechange_0.2.0 lifecycle_1.0.3 cluster_2.1.4
## [13] survival_3.5-5 magrittr_2.0.3 compiler_4.3.0
## [16] rlang_1.1.1 sass_0.4.7 tools_4.3.0
## [19] igraph_1.5.0 utf8_1.2.3 yaml_2.3.7
## [22] data.table_1.14.8 knitr_1.43 labeling_0.4.2
## [25] bit_4.0.5 RColorBrewer_1.1-3 plyr_1.8.8
## [28] Rtsne_0.16 withr_2.5.0 BiocGenerics_0.46.0
## [31] grid_4.3.0 stats4_4.3.0 fansi_1.0.4
## [34] multtest_2.56.0 biomformat_1.28.0 colorspace_2.1-0
## [37] Rhdf5lib_1.22.0 scales_1.2.1 iterators_1.0.14
## [40] MASS_7.3-60 cli_3.6.1 rmarkdown_2.23
## [43] vegan_2.6-4 crayon_1.5.2 generics_0.1.3
## [46] rstudioapi_0.15.0.9000 reshape2_1.4.4 tzdb_0.4.0
## [49] ape_5.7-1 cachem_1.0.8 rhdf5_2.44.0
## [52] zlibbioc_1.46.0 splines_4.3.0 parallel_4.3.0
## [55] XVector_0.40.0 vctrs_0.6.3 Matrix_1.6-0
## [58] jsonlite_1.8.7 IRanges_2.34.1 hms_1.1.3
## [61] S4Vectors_0.38.1 bit64_4.0.5 foreach_1.5.2
## [64] jquerylib_0.1.4 glue_1.6.2 codetools_0.2-19
## [67] stringi_1.7.12 gtable_0.3.3 GenomeInfoDb_1.36.1
## [70] munsell_0.5.0 pillar_1.9.0 htmltools_0.5.5
## [73] rhdf5filters_1.12.1 GenomeInfoDbData_1.2.10 R6_2.5.1
## [76] vroom_1.6.3 evaluate_0.21 lattice_0.21-8
## [79] Biobase_2.60.0 highr_0.10 bslib_0.5.0
## [82] Rcpp_1.0.11 nlme_3.1-162 permute_0.9-7
## [85] mgcv_1.9-0 xfun_0.39 pkgconfig_2.0.3